{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Discrete versus Real AdaBoost\n\n\nThis example is based on Figure 10.2 from Hastie et al 2009 [1]_ and\nillustrates the difference in performance between the discrete SAMME [2]_\nboosting algorithm and real SAMME.R boosting algorithm. Both algorithms are\nevaluated on a binary classification task where the target Y is a non-linear\nfunction of 10 input features.\n\nDiscrete SAMME AdaBoost adapts based on errors in predicted class labels\nwhereas real SAMME.R uses the predicted class probabilities.\n\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n    Learning Ed. 2\", Springer, 2009.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\n# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>,\n#         Noel Dawe <noel.dawe@gmail.com>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME\")\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME.R\")\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',\n        label='Decision Stump Error')\nax.plot([1, n_estimators], [dt_err] * 2, 'k--',\n        label='Decision Tree Error')\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n    ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n    ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n    ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n    ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err,\n        label='Discrete AdaBoost Test Error',\n        color='red')\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err_train,\n        label='Discrete AdaBoost Train Error',\n        color='blue')\nax.plot(np.arange(n_estimators) + 1, ada_real_err,\n        label='Real AdaBoost Test Error',\n        color='orange')\nax.plot(np.arange(n_estimators) + 1, ada_real_err_train,\n        label='Real AdaBoost Train Error',\n        color='green')\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel('n_estimators')\nax.set_ylabel('error rate')\n\nleg = ax.legend(loc='upper right', fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}